
library(rio)
library(ggplot2)
library(texreg)
library(tidyverse)
library(AER)

##set the working directory for the replication archive
working_dir <- "~/Dropbox/dueling project/2nd_paper/replication archive/"

source(paste0(working_dir,"Auxiliary Scripts/RegressionRunner.R"))

##load in the main datasets
newspapers_with <- readRDS(paste0(working_dir,"Data/newspapers_historical.RDS"))
gentzkow <- readRDS(paste0(working_dir,"Data/gentzkow1869papers.RDS"))
colnames(gentzkow) <- c("countyfips", "all_papers1869")

newspapers_with <- merge(newspapers_with, gentzkow, by = "countyfips", all.x = TRUE)
newspapers_with$all_papers1869[is.na(newspapers_with$all_papers1869) & !is.na(newspapers_with$total_pop)] <- 0



m1 <- lm(I(all_papers1869 - all_papers1840) ~ log(numPost_lag1 + 1) + log(pop_lag1) + factor(state), 
         data = newspapers_with)

m2 <- lm(I(all_papers1869 - all_papers1840) ~ log(numPost_lag2 + 1) + log(pop_lag2) + factor(state), 
         data = newspapers_with)

m3 <- lm(I(all_papers1869 - all_papers1840) ~ log(numPost_lag3 + 1) + log(pop_lag3) + factor(state), 
         data = newspapers_with)

m4 <- lm(I(all_papers1869 - all_papers1840) ~ log(numPost_lag4 + 1) + log(pop_lag4) + factor(state), 
         data = newspapers_with)

m_base <- lm(I(all_papers1869 - all_papers1840) ~ I(numPost_lag1 - numPost_lag4) + I(pop_lag1 - pop_lag4) + factor(state), 
             data = newspapers_with)

texreg(list(m1,m2,m3,m4,m_base), omit.coef = "factor|Median|Hispanic|Gini|Poverty|adults|area", 
          custom.model.names = c("Contemporaneous", "First Lags","Second Lags",
                                 "Third Lags",
                                 "Change in Num. Newspapers (1869 - 1840)"),
          custom.coef.names =  c("Intercept",
                                 rep(c("Log Post Offices", "Log Population"),4), 
                                 "Change in Num. POs (1870-1840)", "Change in Population (1870-1840)"))

